library(quadprog)


## This function calculates the estimated average potential outcomes Y(z,a) and a conservertive  covariance matrix estimate

## CalAPO_df takes vectors as input and CalAPO takes vectors of lists as input.
CalAPO_df <- function (id,Z,A,Y){
  n1 <- tapply(Z,id,sum)
  n <- tapply(Z,id,length)
  n0 <- n-n1
  
  A.cluster <- tapply(A,id,mean)
  Ja <- table(A.cluster)
  J <- sum(Ja)
  m <- length(Ja)
  #######  point estimators
  
  Y1j.hat <- rep(0,J)
  Y0j.hat <- rep(0,J)
  
  Y1j.hat <- tapply(Y*Z,id,sum)/n1
  Y0j.hat <- tapply(Y*(1-Z),id,sum)/n0
  
  ###  estiamted APO   (Y(1,1),Y(0,1)...,Y(1,m),Y(0,m))
  Y.hat <-  rep(0,2*m)
  for (a in 1:m){
    Y.hat[2*a-1] <-  sum(Y1j.hat* (A.cluster==a))/Ja[a]
    Y.hat[2*a] <-  sum(Y0j.hat* (A.cluster==a))/Ja[a]
  }
  
  ## covariance matrix est
  cov.hat <-  array(0,dim=c(2*m,2*m))
  
  
  for (a in 1:m){
    cov.hat[2*a-1,2*a-1] <-   1/Ja[a]*sum((Y1j.hat-Y.hat[2*a-1] )^2*(A.cluster==a))/(Ja[a]-1)
    cov.hat[2*a,2*a] <-   1/Ja[a]*sum((Y0j.hat-Y.hat[2*a] )^2*(A.cluster==a))/(Ja[a]-1)
    cov.hat[2*a,2*a-1] <-   1/Ja[a]*sum((Y1j.hat-Y.hat[2*a-1] )*(Y0j.hat-Y.hat[2*a])*(A.cluster==a))/(Ja[a]-1)
    cov.hat[2*a-1,2*a] <- cov.hat[2*a,2*a-1]
  }
  
  return(list(Y.hat=Y.hat, cov.hat = cov.hat))
}



### Testing the hypotheses of DE=0,MDE=0, SE=0 
Test2SRE <- function(id,Z,A,Y,qa,effect = "DE"){
 est <-  CalAPO_df(id,Z,A,Y)
  m <- length(table(A))
  C1 <-  array(0,dim=c(m,2*m))
  C2 <-  rep(0,2*m)
  
  for (a in 1:m){
    C1[a,2*a-1] <-  1
    C1[a,2*a] <-  -1
    C2[2*a-1] <-  qa[a]
    C2[2*a] <-  -qa[a]
  }
  
  C3 = array(0,dim=c(2*m-2,2*m))
  for ( a in 1:(m-1)){
    C3[a,2*a-1] <- 1
    C3[a,2*a+1] <- -1
    C3[m-1+a,2*a] <- 1
    C3[m-1+a,2*a+2] <- -1
  }
  
  
  if ( effect == "DE"){ 
    rej <-  (t(C1%*%est$Y.hat)%*%solve(C1%*%est$cov.hat%*%t(C1))%*%(C1%*%est$Y.hat)) > qchisq(0.95,m)
  }
  if (effect == "MDE"){
    rej <-  (sum(C2*est$Y.hat))^2/(t(C2)%*%est$cov.hat%*%(C2)) > qchisq(0.95,1)
  }
  if (effect == "SE"){
    rej <-  (t(C3%*%est$Y.hat)%*%solve(C3%*%est$cov.hat%*%t(C3))%*%(C3%*%est$Y.hat)) >  qchisq(0.95,2*m-2)
  }
  
  return(drop(rej))
}


#######  Sample size formula
#### Calculate s^2 in the paper

Funs = function(x,m,alpha=0.05, beta=0.2 ){
  chi <-  qchisq(1-alpha, m)
  return (  pchisq(chi,m,ncp=x)  - beta    )
}

Cals = function(m, alpha=0.05, beta=0.2){
  uniroot(Funs,m=m, alpha=alpha,beta=beta,lower=0,upper=100)$root
}


####  minimize  s'(C3 D.hat C3)^{-1}s under max |ASE(z,a,a')|=1 for z=0,1
## The function enumerates all the possible situations when max |ASE(z;a,a')|=1 for z=0,1


### this may not be efficient 

quadprogSE = function(D.se){
  m <-  (dim(D.se)[1]+2)/2
  
  # quadratic function
  D <-  2*solve(D.se)
  
  # construct the constraint  A s \geq s0
  A.sub <-  NULL
  for ( i in 1:(m-1)){
    for (j in i:(m-1)){
      A.sub <- rbind(A.sub, c(rep(0,i-1), rep(1,j+1-i),rep(0,m-1-j)))
    }
  }
  zeros <-  array(0,dim=c(m*(m-1)/2,m-1))
  A1 <-  cbind(A.sub,zeros)
  A1 <-  rbind(A1,-A1)
  A0 <-  cbind(zeros,A.sub)
  A0 <-  rbind(A0,-A0)
  A <-  rbind(A1,A0)
  min <-  100
  for ( i in 1:(m*(m-1)/2)){
    for ( j in 1:(m*(m-1)/2)){
    s0 <-  rep(-1,2*m*(m-1))
    s0[i] <-  s0[i]+2
    s0[m*(m-1)+j] <-  s0[m*(m-1)+j]+2
    res <- solve.QP(Dmat=D, dvec = rep(0,2*m-2),Amat = t(A),bvec= s0)
    if (res$value<min){min <-  res$value}
    }
  }
  return(min)
}



##### parameters for sample size calculation

Calpara <- function(id,Z,A,Y){
  # number of clusters
  A.cluster <- tapply(A,id,mean)
  Ja <- table(A.cluster)
  J <- sum(Ja)
  m <- length(Ja)
  
  n1 <- tapply(Z,id,sum)
  n0 <- tapply(1-Z,id,sum)
  n <- n1+n0
  n.bar <- J/sum(1/n)
  
  id.cluster <- unique(data$id)
  
  
  est.Yj <-  array(dim=c(J,2))
  est.sigmaj <- array(dim=c(J,2))
  
  for (j in 1:J){
    Z.sub <-  Z[id==id.cluster[j]]
    Y.sub <-  Y[id==id.cluster[j]]
    n1.sub <- sum(Z.sub)
    n0.sub <- sum(1-Z.sub)
    est.Yj[j,2] <- sum(Z.sub*Y.sub)/n1.sub
    est.Yj[j,1] <- sum((1-Z.sub)*Y.sub)/n0.sub
    est.sigmaj [j,2] <-   sum( (Y.sub-est.Yj[j,2])^2*Z.sub)/(n1.sub-1)
    est.sigmaj [j,1] <-   sum( (Y.sub-est.Yj[j,1])^2*(1-Z.sub))/(n0.sub-1)
  }
  
  est.Y <-  array(0,dim=c(2,m))
  sigmab.hat <- array(0,dim=c(2,m))
  
  for ( a in 1:m){
    for (z in 1:2){
      est.Y[z,a] <- sum(est.Yj[,z]*(A.cluster==a))/Ja[a]
      sigmab.hat[z,a] <- sum((est.Yj[,z]-est.Y[z,a])^2*(A.cluster==a))/(Ja[a]-1)
    }
  }
  
  #### E(sigmaw.hat) = sigmaw
  sigmaw <- mean(est.sigmaj)
  #### E(sigmab.hat) = sigmab+...
  sigmab <- array(0,dim=c(2,m))
  for ( a in 1:m){
    sigmab[1,a] <- sigmab.hat[1,a] - sum((1-n0/n)*1/n0*est.sigmaj[,1])/J
    sigmab[2,a] <- sigmab.hat[2,a] - sum((1-n1/n)*1/n1*est.sigmaj[,2])/J
    
  }
  sigmab <- mean(sigmab)
  
  
  return(list(sigmaw = sigmaw,sigmab = sigmab,r=sigmab/(sigmab+sigmaw), n.bar = n.bar))
  
}


###  sigma: the total variance
### r is the intra-class correlationn coefficient
### mu is the effect size
### pa is the treated propotion under different treatment assignment mechnisms
### qa is the proportion of different treatment assignment mechnisms




Calsamplesize = function (mu,n.bar,qa, pa, r, sigma=1, alpha=0.05, beta=0.2){  
  m <-  length(qa)  
  D0 <-  array(0,dim=c(2*m,2*m))
  
  for (i in 1:m){
    D0[2*i-1,2*i-1] <-  (r+(1-pa[i])*(1-r)/n.bar/pa[i])/qa[i]
    D0[2*i,2*i] <-  (r+pa[i]*(1-r)/n.bar/(1-pa[i]))/qa[i]
    D0[2*i-1,2*i] <-  0
    D0[2*i,2*i-1] <-  0
  }
  
  ## constrast matrices
  C1 <-  array(0,dim=c(m,2*m))
  C2 <-  rep(0,2*m)
  
  for (i in 1:m){
    C1[i,2*i-1] <-  1
    C1[i,2*i] <-  -1
    C2[2*i-1] <-  qa[i]
    C2[2*i] <-  -qa[i]
  }
  
  C3 <-  array(0,dim=c(2*m-2,2*m))
  for ( i in 1:(m-1)){
    C3[i,2*i-1] <- 1
    C3[i,2*i+1] <- -1
    C3[m-1+i,2*i] <- 1
    C3[m-1+i,2*i+2] <- -1
  }
  
  #### sample size formulas
  
  ## direct effect
  s1 <-   Cals(m,alpha,beta)
  J.DE <-      s1/sum( 1/diag(C1 %*% D0 %*% t(C1)) )*sigma/mu^2  
  
  ## marginal direct effect
  
  s2 <-   Cals(1,alpha,beta)
  J.MDE <-      s2*t(C2) %*% D0 %*% (C2)*sigma/mu^2
  
  
  ##### spillover effect
  s3  <-   Cals(2*m-2,alpha,beta)
  
  J.SE <-  s3*sigma/quadprogSE(C3 %*% D0 %*% t(C3))/mu^2
  
  samplesize <-  c(J.DE,J.MDE,J.SE)
  return (samplesize)	    
}